
clear all
set more off

local path0="C:\Dropbox\GeneticsProject (1)\REStat\round_accepted_replication_files\"

cd "`path0'Workfiles\"

use PWT_for_merge.dta, clear

duplicates report countrycode

*** merge with DNA data
capture drop _merge
joinby countrycode using DNA_blood_data.dta, unmatched(both)
tab _merge
drop if countrycode=="YUG" | countrycode=="MMR" 
drop if _merge~=3
drop _merge
duplicates report countrycode


*** merge with ICRG data
capture drop _merge
joinby countrycode using ICRGdata.dta, unmatched(both)
tab _merge
drop _merge
duplicates report countrycode


*** merge with fractionalization data from Fearon
capture drop _merge
joinby countrycode using Fearon_Fractionalization.dta, unmatched(both)
tab _merge
drop if _merge==2
drop if _merge==1
drop _merge
duplicates report countrycode


*** merge with cultural data from Licht et al
capture drop _merge
joinby countrycode using Licht_culture, unmatched(both)
tab _merge
drop _merge
duplicates report countrycode


*** merge with cultural data from Schwartz
capture drop _merge
joinby countrycode using Schwartz_data, unmatched(both)
tab _merge
drop if _merge==2
drop _merge
duplicates report countrycode

*** merge with cultural data from Hofstede
capture drop _merge
joinby countrycode using culture_Hofstede, unmatched(both)
tab _merge
drop if _merge==2
drop _merge
duplicates report countrycode


*** merge with TFP data from Hall and Jones
capture drop _merge
joinby countrycode using HallJones.dta, unmatched(both)
tab _merge
drop if _merge==2
drop _merge
duplicates report countrycode


*** merge with alternative blood data
capture drop _merge
joinby countrycode using data_blood_alternative_sources, unmatched(master)
tab _merge
drop _merge
duplicates report countrycode

*** merge with settler mortality
capture drop _merge
joinby countrycode using AJR_data, unmatched(master)
tab _merge
drop _merge
duplicates report countrycode

*** merge with Spolaore-Wacziarg distance
capture drop _merge
joinby countrycode using distance_SP_UK.dta, unmatched(master)
tab _merge
duplicates report countrycode

capture drop _merge
joinby countrycode using distance_SP_US.dta, unmatched(master)
tab _merge
duplicates report countrycode


*** merge with legal origin data
capture drop _merge
joinby countrycode using legal_origins, unmatched(master)
tab _merge
drop _merge
duplicates report countrycode


*** merge with geographic distance to the UK
capture drop _merge
joinby countrycode using geo_data_relative_to_UK, unmatched(master)
tab _merge
drop _merge
duplicates report countrycode


*** merge with human capital data: BARRO-LEE
capture drop _merge
joinby countrycode using BarroLee_data_updated.dta, unmatched(both)
tab _merge
drop _merge
duplicates report countrycode

*** merge with Maddison data
capture drop _merge
joinby countrycode using Maddison.dta, unmatched(both)
tab _merge
drop if _merge==2
tab _merge

drop _merge
duplicates report countrycode
drop if countrycode==""

joinby countrycode using "Pathogen_prevalence_MurraySchaller_April2015", unmatched(master)

tab _merge
drop _merge
duplicates report countrycode


*** merge with Chiao-Blizinsky (2009) data
capture drop _merge
capture drop countryX
joinby countrycode using "5HTTLPR.dta", unmatched(both)

tab _merge
drop if _merge==2
tab _merge
drop _merge
drop if countrycode==""
duplicates report countrycode
capture drop   country_Chiao_Blizinsky


*** merge with Way and Lieberman (SCAN 2010) data
capture drop _merge
capture drop countryX
joinby countrycode using "WayLieberman-SCAN-2010.dta", unmatched(both)


tab _merge
drop if _merge==2
tab _merge
drop _merge
drop if countrycode==""
duplicates report countrycode
capture drop   country_WayLieberman


capture drop _merge
joinby countrycode using "updated_A118G.dta", unmatched(both)
tab _merge
drop _merge

capture drop _merge
joinby countrycode using "updated_5HTTLPR.dta", unmatched(both)
tab _merge
drop _merge


gen HTTLPR=httlpr_s
replace HTTLPR=freq_5HTTLPR if HTTLPR==.
drop  httlpr_s  freq_5HTTLPR

gen A118G=allele_g
replace A118G=freq_118g if A118G==.
replace A118G=A118G*100
drop allele_g freq_118g

*** replace with countries that have large sample sizes
replace A118G=0.34768117*100 if countrycode=="MEX"
replace A118G=0.365895161*100 if countrycode=="CHN"
replace A118G=0.183910084*100 if countrycode=="FIN"
replace A118G=0.12065836*100 if countrycode=="GER"
replace A118G=.1020396475*100 if countrycode=="SWE"
replace A118G=.117*100 if countrycode=="IRL"



*** merge with Barro religion data
capture drop _merge
joinby countrycode using barro-religion.dta, unmatched(both)
tab _merge
drop if _merge==2
drop _merge

drop if countrycode==""
duplicates report countrycode


*** save final data
gen abs_long=abs(longitude)
gen abs_lati=abs(latitude)

label var abs_long "Absolute longitude"
label var abs_lati "Absolute latitude"



*====================================================================
*		Compute blood distance relative to the UK (and US)
*		Use: Wikipedia data
*====================================================================

cor  bloodA_wiki bloodB_wiki, cov
matrix A=r(C)
matrix B=inv(A)
 
 
foreach var in GBR USA {
	capture drop temp*
	gen temp_bloodA= bloodA_wiki if countrycode=="`var'"
	gen temp_bloodB= bloodB_wiki if countrycode=="`var'"

	egen temp1_bloodA=max(temp_bloodA)
	egen temp1_bloodB=max(temp_bloodB)

	gen distE_`var'_wiki=sqrt( (bloodA_wiki-temp1_bloodA)^2 + (bloodB_wiki-temp1_bloodB)^2 )

	gen distM_`var'_wiki=sqrt(  (bloodA_wiki-temp1_bloodA)*(B[1,1]*(bloodA_wiki-temp1_bloodA) + B[2,1]*(bloodB_wiki-temp1_bloodB) ) ///
						      + (bloodB_wiki-temp1_bloodB)*(B[1,2]*(bloodA_wiki-temp1_bloodA) + B[2,2]*(bloodB_wiki-temp1_bloodB) ) )
		
	gen distA_`var'_wiki=abs(bloodA_wiki-temp1_bloodA)
	gen distB_`var'_wiki=abs(bloodB_wiki-temp1_bloodB)	
		
	capture drop temp*
}

label var distE_GBR_wiki "Euclidian distance from UK; based on wiki"
label var distM_GBR_wiki "Mahalanobis distance from UK; based on wiki"
label var distE_USA_wiki "Euclidian distance from USA; based on wiki"
label var distM_USA_wiki "Mahalanobis distance from USA; based on wiki"

label var distA_USA_wiki "Linear distance from USA: blood type A; based on wiki"
label var distB_USA_wiki "Linear distance from USA: blood type B; based on wiki"
label var distA_GBR_wiki "Linear distance from UK: blood type A; based on wiki"
label var distB_GBR_wiki "Linear distance from UK: blood type B; based on wiki"




*====================================================================
*		Compute blood distance relative to the UK (and US)
*		Use: Nomi book
*====================================================================

cor  bloodA_nomi bloodB_nomi, cov
matrix A=r(C)
matrix B=inv(A)
 
 
foreach var in GBR USA {
	capture drop temp*
	gen temp_bloodA= bloodA_nomi if countrycode=="`var'"
	gen temp_bloodB= bloodB_nomi if countrycode=="`var'"

	egen temp1_bloodA=max(temp_bloodA)
	egen temp1_bloodB=max(temp_bloodB)

	gen distE_`var'_nomi=sqrt( (bloodA_nomi-temp1_bloodA)^2 + (bloodB_nomi-temp1_bloodB)^2 )

	gen distM_`var'_nomi=sqrt(  (bloodA_nomi-temp1_bloodA)*(B[1,1]*(bloodA_nomi-temp1_bloodA) + B[2,1]*(bloodB_nomi-temp1_bloodB) ) ///
						      + (bloodB_nomi-temp1_bloodB)*(B[1,2]*(bloodA_nomi-temp1_bloodA) + B[2,2]*(bloodB_nomi-temp1_bloodB) ) )
		
	gen distA_`var'_nomi=abs(bloodA_nomi-temp1_bloodA)
	gen distB_`var'_nomi=abs(bloodB_nomi-temp1_bloodB)	
		
	capture drop temp*
}

label var distE_GBR_wiki "Euclidian distance from UK; based on Nomi"
label var distM_GBR_wiki "Mahalanobis distance from UK; based on Nomi"
label var distE_USA_wiki "Euclidian distance from USA; based on Nomi"
label var distM_USA_wiki "Mahalanobis distance from USA; based on Nomi"

label var distA_USA_wiki "Linear distance from USA: blood type A; based on Nomi"
label var distB_USA_wiki "Linear distance from USA: blood type B; based on Nomi"
label var distA_GBR_wiki "Linear distance from UK: blood type A; based on Nomi"
label var distB_GBR_wiki "Linear distance from UK: blood type B; based on Nomi"


*** Innovation data
capture drop _merge
joinby countrycode using EIU_innovation_2007, unmatched(master)
tab _merge
drop _merge

capture drop _merge
joinby countrycode using EIU_innovation_2009, unmatched(master)
tab _merge
drop _merge


foreach var in  eiu_ppm {
	gen `var'=(`var'_2007+`var'_2009)/2
}

label var 	eiu_ppm "EIU: Patents per m"
drop eiu_ppm_200?


*~~~~~~~~~~~~~~~~~~~~~~~~
* trust indexes
capture drop _merge
joinby countrycode using VWS_trust, unmatched(master)
tab _merge
drop _merge

drop if countrycode==""
drop if countrycode=="#N/A"

*====================================================================
*		share of european descent
*====================================================================
capture drop _merge
joinby countrycode using final_diffusion, unmatched(both)

tab _merge, mis
capture drop _merge
drop if countrycode==""


*====================================================================
*		Merge data for measures of import of technology
*====================================================================
capture drop _merge
joinby countrycode using CominMestieri_processed, unmatched(both)

tab countrycode _merge, mis
capture drop _merge
drop if countrycode==""



*====================================================================
*		Merge data for measures of import of technology
*====================================================================
capture drop _merge
joinby countrycode using PuttermanWeil, unmatched(master)

tab countrycode _merge, mis
capture drop _merge
drop if countrycode==""

*** exclude city-states
drop if countrycode=="SGP"
drop if countrycode=="HKG"

compress
save completed_data, replace


